Top departments (bar)
# 15 most-ordered departments
top_dept <- ic %>%
dplyr::count(department, name = "n") %>%
dplyr::arrange(desc(n)) %>%
dplyr::slice_head(n = 15)
p_bar <- ggplot(top_dept, aes(x = reorder(department, n), y = n)) +
geom_col() +
coord_flip() +
labs(title = "Top 15 departments by orders",
x = NULL, y = "Number of products ordered")
plotly::ggplotly(p_bar)
Orders by hour × day (line)
# Aggregate order counts by day-of-week and hour
by_hour <- ic %>%
dplyr::group_by(dow, hour) %>%
dplyr::summarise(n = dplyr::n(), .groups = "drop")
p_line <- ggplot(by_hour, aes(x = hour, y = n, color = dow, group = dow)) +
geom_line() +
labs(title = "Order volume by hour across days",
x = "Hour of day", y = "Orders", color = "Day")
plotly::ggplotly(p_line, tooltip = c("x","y","color"))
Aisle add-to-cart order (boxplot)
# Keep aisles with enough observations; collapse to top 15 aisles
aisle_box <- ic %>%
dplyr::group_by(aisle) %>%
dplyr::filter(dplyr::n() >= 400) %>%
dplyr::ungroup() %>%
dplyr::mutate(aisle = forcats::fct_lump_n(aisle, n = 15)) %>%
dplyr::filter(!is.na(aisle))
aisle_order <- aisle_box %>%
dplyr::group_by(aisle) %>%
dplyr::summarise(med = median(add_to_cart_order, na.rm = TRUE), .groups = "drop") %>%
dplyr::arrange(med) %>%
dplyr::pull(aisle)
aisle_box <- aisle_box %>%
dplyr::mutate(aisle = factor(aisle, levels = aisle_order))
p_box <- ggplot(aisle_box, aes(x = aisle, y = add_to_cart_order)) +
geom_boxplot(outlier.alpha = 0.2) +
coord_flip() +
labs(title = "Add-to-cart order distribution by aisle",
x = NULL, y = "Add-to-cart order (position in the cart)")
plotly::ggplotly(p_box, tooltip = c("x","y"))